# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script produces ATT and dynamic treatment effect estimates 
# for both transitions during the period when the pre-test passes 
# (1996-2005) using the Callaway and Sant'Anna estimator. 
# These are combined with TWFE estimates to produce Table D.3. The dynamic treatment
# effects estimated by this script are used to produce Figures D.1 and D.2.
pacman::p_load(
  did,
  tidyverse,
  openxlsx,
  dplyr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

########################################
#---------- Flood to Center Pivot or LEPA -----#
#PASSES FOR 1996 to 2005 years
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter to the period when pre-trend passes
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year >=1996 & wua_year <= 2005)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                 dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))
#Make one version for the ATT's
att_df <- df_template
#Make another for the cohort effects
grp_df <- df_template
#Next version for the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_flood_cplepa %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_floodcplepa_attgt_nyt <- att_gt(yname = dep_var,
                                        tname = "wua_year",
                                        idname = "WR_GROUP",
                                        gname = "cohort_flood_cplepa",
                                        data = wrg_flood_cplepa,
                                        panel = TRUE,
                                        allow_unbalanced_panel = TRUE,
                                        xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                        est_method = "dr",
                                        control_group = c("notyettreated"),
                                        anticipation=0,
                                        clustervars = "WR_GROUP", 
                                        biters=1000
  )
  #Group specific effects to get overall ATT
  agg.gs <- aggte(sharp_floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store ATT  
  att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
  #Store cohort effects
  agg.gs <- aggte(sharp_floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  group_order <- c(agg.gs[["egt"]])
  g_est_lst <- c(agg.gs[["att.egt"]])
  g_se_lst <- c(agg.gs[["se.egt"]])
  g_c <- agg.gs$crit.val.egt
  for (i in 1:length(group_order)) {
    grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
  }
  #Dynamic effects
  agg.es <- aggte(sharp_floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store the att  
  att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
  #Store the event study coefficients
  agg.es <- aggte(sharp_floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  effect_order <- c(agg.es[["egt"]])
  est_lst <- c(agg.es[["att.egt"]])
  se_lst <- c(agg.es[["se.egt"]])
  e_c <- agg.es$crit.val.egt
  for (i in 1:length(effect_order)) {
    dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
  }
}
#Create the 95% ci variables
att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                              ci_95_ub = estimate + c_crit_val*se_estimate)
grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))

#Save as excel workbook
dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
write.xlsx(dataset_names, file.path(dr_temp, "/cs_floodtocporlepa_96_05_nyt.xlsx"))

#Remove excess objects
rm(wrg_flood_cplepa, att_df, grp_df, dyn_df, sharp_floodcplepa_attgt_nyt)
gc()

#----------------------------------------------# 
#---------- Center Pivot to LEPA --------------#
#----------------------------------------------# 
#Only 1996 to 2005
wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
wrg_cp_lepa <- wrg_cp_lepa %>% dplyr::filter(wua_year >= 1996 & wua_year <= 2005)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                 dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))
#Make one version for the ATT's
att_df <- df_template
#Make another for the cohort effects
grp_df <- df_template
#Next version for the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_cp_lepa %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_cplepa_attgt_nyt <- att_gt(yname = dep_var,
                                   tname = "wua_year",
                                   idname = "WR_GROUP",
                                   gname = "cohort_cp_lepa",
                                   data = wrg_cp_lepa,
                                   panel = TRUE,
                                   allow_unbalanced_panel = TRUE,
                                   xformla=~hyd_cond+predev_sat+sandtotal+silttotal+init_af_used,
                                   est_method = "dr",
                                   control_group = c("notyettreated"),
                                   anticipation=0,
                                   clustervars = "WR_GROUP", 
                                   biters=1000
  )
  #Group specific effects to get overall ATT
  agg.gs <- aggte(sharp_cplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store ATT  
  att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
  #Store cohort effects
  agg.gs <- aggte(sharp_cplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  group_order <- c(agg.gs[["egt"]])
  g_est_lst <- c(agg.gs[["att.egt"]])
  g_se_lst <- c(agg.gs[["se.egt"]])
  g_c <- agg.gs$crit.val.egt
  for (i in 1:length(group_order)) {
    grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
  }

  #Dynamic effects
  agg.es <- aggte(sharp_cplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  #Store ATT  
  att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"],  agg.es["crit.val.egt"], mean_dep_var) 
  #Store event study coefficients
  agg.es <- aggte(sharp_cplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  effect_order <- c(agg.es[["egt"]])
  est_lst <- c(agg.es[["att.egt"]])
  se_lst <- c(agg.es[["se.egt"]])
  e_c <- agg.es$crit.val.egt
  for (i in 1:length(effect_order)) {
    dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
  }
}
#Create the 95% ci variables
att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                             ci_95_ub = estimate + c_crit_val*se_estimate)
grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
#Save as excel workbook
dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
write.xlsx(dataset_names, file.path(dr_temp, "/cs_cptolepa_96_05.xlsx"))

#Remove excess objects
rm(wrg_cp_lepa, att_df, grp_df, dyn_df, sharp_cplepa_attgt_nyt)
gc()

############### Export CS estimates for Table D.3 ###########################
#Merge the ATT estimates for the two transitions into one spreadsheet.
flood_cplepa_crop_att <- read.xlsx(paste0(dr_temp, "/cs_floodtocporlepa_96_05_nyt.xlsx"), sheet = "cs_att") %>%
  dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
  dplyr::mutate(transition = "flood to center pivot or LEPA")
cp_lepa_crop_att <- read.xlsx(paste0(dr_temp, "/cs_cptolepa_96_05.xlsx"), sheet = "cs_att") %>%
  dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
  dplyr::mutate(transition = "center pivot to LEPA")
output_df <- rbind(flood_cplepa_crop_att, cp_lepa_crop_att) 
#save
write.csv(output_df, file = file.path(dr_temp, "/cs_estimates_att_96_05.csv"))
